import os
import sys
sys.path.append('../../')
from DataProcessing.tools.HdfsTools import HdfsTools


if __name__ == '__main__':
    os.system('echo $PATH')
    base_cmd = "python DataRD.py -r hadoop"

    output_path = "-o hdfs://localhost:9000/data_clean/remove_duplicate/"

    data_path = "hdfs://localhost:9000/src_data/UserBehavior.csv"

    run_cmd = base_cmd + " " + output_path + " " + data_path

    os.system(run_cmd)

    ht = HdfsTools()
    ht.rename('/data_clean/remove_duplicate/part-00000', '/data_clean/remove_duplicate/UB_clean.csv')
